@@ -0,0 +1,214 @@ |
||
1 |
+require 'net/ftp' |
|
2 |
+require 'uri' |
|
3 |
+require 'time' |
|
4 |
+ |
|
5 |
+module Agents |
|
6 |
+ class FtpsiteAgent < Agent |
|
7 |
+ cannot_receive_events! |
|
8 |
+ |
|
9 |
+ default_schedule "every_12h" |
|
10 |
+ |
|
11 |
+ description <<-MD |
|
12 |
+ The FtpsiteAgent checks a FTP site and creates Events based on newly uploaded files in a directory. |
|
13 |
+ |
|
14 |
+ Specify a `url` that represents a directory of an FTP site to watch, and a list of `patterns` to match against file names. |
|
15 |
+ |
|
16 |
+ Login credentials can be included in `url` if authentication is required. |
|
17 |
+ |
|
18 |
+ Only files with a last modification time later than the `after` value, if specifed, are notified. |
|
19 |
+ MD |
|
20 |
+ |
|
21 |
+ event_description <<-MD |
|
22 |
+ Events look like this: |
|
23 |
+ |
|
24 |
+ { |
|
25 |
+ "url": "ftp://example.org/pub/releases/foo-1.2.tar.gz", |
|
26 |
+ "filename": "foo-1.2.tar.gz", |
|
27 |
+ "timestamp": "2014-04-10T22:50:00Z" |
|
28 |
+ } |
|
29 |
+ MD |
|
30 |
+ |
|
31 |
+ def working? |
|
32 |
+ event_created_within?(options['expected_update_period_in_days']) && !recent_error_logs? |
|
33 |
+ end |
|
34 |
+ |
|
35 |
+ def default_options |
|
36 |
+ { |
|
37 |
+ 'expected_update_period_in_days' => "1", |
|
38 |
+ 'url' => "ftp://example.org/pub/releases/", |
|
39 |
+ 'patterns' => [ |
|
40 |
+ 'foo-*.tar.gz', |
|
41 |
+ ], |
|
42 |
+ 'after' => Time.now.iso8601, |
|
43 |
+ } |
|
44 |
+ end |
|
45 |
+ |
|
46 |
+ def validate_options |
|
47 |
+ # Check for required fields |
|
48 |
+ begin |
|
49 |
+ url = options['url'] |
|
50 |
+ String === url or raise |
|
51 |
+ uri = URI(url) |
|
52 |
+ URI::FTP === uri or raise |
|
53 |
+ errors.add(:base, "url must end with a slash") unless uri.path.end_with?('/') |
|
54 |
+ rescue |
|
55 |
+ errors.add(:base, "url must be a valid FTP URL") |
|
56 |
+ end |
|
57 |
+ |
|
58 |
+ patterns = options['patterns'] |
|
59 |
+ case patterns |
|
60 |
+ when Array |
|
61 |
+ if patterns.empty? |
|
62 |
+ errors.add(:base, "patterns must not be empty") |
|
63 |
+ end |
|
64 |
+ when nil, '' |
|
65 |
+ errors.add(:base, "patterns must be specified") |
|
66 |
+ else |
|
67 |
+ errors.add(:base, "patterns must be an array") |
|
68 |
+ end |
|
69 |
+ |
|
70 |
+ # Check for optional fields |
|
71 |
+ if (timestamp = options['timestamp']).present? |
|
72 |
+ begin |
|
73 |
+ Time.parse(timestamp) |
|
74 |
+ rescue |
|
75 |
+ errors.add(:base, "timestamp cannot be parsed as time") |
|
76 |
+ end |
|
77 |
+ end |
|
78 |
+ |
|
79 |
+ if options['expected_update_period_in_days'].present? |
|
80 |
+ errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days']) |
|
81 |
+ end |
|
82 |
+ end |
|
83 |
+ |
|
84 |
+ def check |
|
85 |
+ saving_entries do |found| |
|
86 |
+ each_entry { |filename, mtime| |
|
87 |
+ found[filename, mtime] |
|
88 |
+ } |
|
89 |
+ end |
|
90 |
+ end |
|
91 |
+ |
|
92 |
+ def each_entry |
|
93 |
+ patterns = options['patterns'] |
|
94 |
+ |
|
95 |
+ after = |
|
96 |
+ if str = options['after'] |
|
97 |
+ Time.parse(str) |
|
98 |
+ else |
|
99 |
+ Time.at(0) |
|
100 |
+ end |
|
101 |
+ |
|
102 |
+ open_ftp(base_uri) do |ftp| |
|
103 |
+ log "Listing the directory" |
|
104 |
+ # Do not use a block style call because we need to call other |
|
105 |
+ # commands during iteration. |
|
106 |
+ list = ftp.list('-a') |
|
107 |
+ |
|
108 |
+ month2year = {} |
|
109 |
+ |
|
110 |
+ list.each do |line| |
|
111 |
+ mon, day, smtn, rest = line.split(' ', 9)[5..-1] |
|
112 |
+ |
|
113 |
+ # Remove symlink target part if any |
|
114 |
+ filename = rest[/\A(.+?)(?:\s+->\s|\z)/, 1] |
|
115 |
+ |
|
116 |
+ patterns.any? { |pattern| |
|
117 |
+ File.fnmatch?(pattern, filename) |
|
118 |
+ } or next |
|
119 |
+ |
|
120 |
+ case smtn |
|
121 |
+ when /:/ |
|
122 |
+ if year = month2year[mon] |
|
123 |
+ mtime = Time.parse("#{mon} #{day} #{year} #{smtn} GMT") |
|
124 |
+ else |
|
125 |
+ log "Getting mtime of #{filename}" |
|
126 |
+ mtime = ftp.mtime(filename) |
|
127 |
+ month2year[mon] = mtime.year |
|
128 |
+ end |
|
129 |
+ else |
|
130 |
+ # Do not bother calling MDTM for old files. Losing the |
|
131 |
+ # time part only makes a timestamp go backwards, meaning |
|
132 |
+ # that it will trigger no new event. |
|
133 |
+ mtime = Time.parse("#{mon} #{day} #{smtn} GMT") |
|
134 |
+ end |
|
135 |
+ |
|
136 |
+ after < mtime or next |
|
137 |
+ |
|
138 |
+ yield filename, mtime |
|
139 |
+ end |
|
140 |
+ end |
|
141 |
+ end |
|
142 |
+ |
|
143 |
+ def open_ftp(uri) |
|
144 |
+ ftp = Net::FTP.new |
|
145 |
+ |
|
146 |
+ log "Connecting to #{uri.host}#{':%d' % uri.port if uri.port != uri.default_port}" |
|
147 |
+ ftp.connect(uri.host, uri.port) |
|
148 |
+ |
|
149 |
+ user = |
|
150 |
+ if str = uri.user |
|
151 |
+ URI.decode(str) |
|
152 |
+ else |
|
153 |
+ 'anonymous' |
|
154 |
+ end |
|
155 |
+ password = |
|
156 |
+ if str = uri.password |
|
157 |
+ URI.decode(str) |
|
158 |
+ else |
|
159 |
+ 'anonymous@' |
|
160 |
+ end |
|
161 |
+ log "Logging in as #{user}" |
|
162 |
+ ftp.login(user, password) |
|
163 |
+ |
|
164 |
+ ftp.passive = true |
|
165 |
+ |
|
166 |
+ path = uri.path.chomp('/') |
|
167 |
+ log "Changing directory to #{path}" |
|
168 |
+ ftp.chdir(path) |
|
169 |
+ |
|
170 |
+ yield ftp |
|
171 |
+ ensure |
|
172 |
+ log "Closing the connection" |
|
173 |
+ ftp.close |
|
174 |
+ end |
|
175 |
+ |
|
176 |
+ def base_uri |
|
177 |
+ @base_uri ||= URI(options['url']) |
|
178 |
+ end |
|
179 |
+ |
|
180 |
+ def saving_entries |
|
181 |
+ known_entries = memory['known_entries'] || {} |
|
182 |
+ found_entries = {} |
|
183 |
+ new_files = [] |
|
184 |
+ |
|
185 |
+ yield proc { |filename, mtime| |
|
186 |
+ found_entries[filename] = misotime = mtime.utc.iso8601 |
|
187 |
+ unless (prev = known_entries[filename]) && misotime <= prev |
|
188 |
+ new_files << filename |
|
189 |
+ end |
|
190 |
+ } |
|
191 |
+ |
|
192 |
+ new_files.sort_by { |filename| |
|
193 |
+ found_entries[filename] |
|
194 |
+ }.each { |filename| |
|
195 |
+ create_event :payload => { |
|
196 |
+ 'url' => (base_uri + filename).to_s, |
|
197 |
+ 'filename' => filename, |
|
198 |
+ 'timestamp' => found_entries[filename], |
|
199 |
+ } |
|
200 |
+ } |
|
201 |
+ |
|
202 |
+ memory['known_entries'] = found_entries |
|
203 |
+ save! |
|
204 |
+ end |
|
205 |
+ |
|
206 |
+ private |
|
207 |
+ |
|
208 |
+ def is_positive_integer?(value) |
|
209 |
+ Integer(value) >= 0 |
|
210 |
+ rescue |
|
211 |
+ false |
|
212 |
+ end |
|
213 |
+ end |
|
214 |
+end |
@@ -0,0 +1,79 @@ |
||
1 |
+require 'spec_helper' |
|
2 |
+require 'time' |
|
3 |
+ |
|
4 |
+describe Agents::FtpsiteAgent do |
|
5 |
+ describe "checking anonymous FTP" do |
|
6 |
+ before do |
|
7 |
+ @site = { |
|
8 |
+ 'expected_update_period_in_days' => 1, |
|
9 |
+ 'url' => "ftp://ftp.example.org/pub/releases/", |
|
10 |
+ 'patterns' => ["example-*.tar.gz"], |
|
11 |
+ } |
|
12 |
+ @checker = Agents::FtpsiteAgent.new(:name => "Example", :options => @site, :keep_events_for => 2) |
|
13 |
+ @checker.user = users(:bob) |
|
14 |
+ @checker.save! |
|
15 |
+ stub(@checker).each_entry.returns { |block| |
|
16 |
+ block.call("example-latest.tar.gz", Time.parse("2014-04-01T10:00:01Z")) |
|
17 |
+ block.call("example-1.0.tar.gz", Time.parse("2013-10-01T10:00:00Z")) |
|
18 |
+ block.call("example-1.1.tar.gz", Time.parse("2014-04-01T10:00:00Z")) |
|
19 |
+ } |
|
20 |
+ end |
|
21 |
+ |
|
22 |
+ describe "#check" do |
|
23 |
+ it "should validate the integer fields" do |
|
24 |
+ @checker.options['expected_update_period_in_days'] = "nonsense" |
|
25 |
+ lambda { @checker.save! }.should raise_error; |
|
26 |
+ @checker.options = @site |
|
27 |
+ end |
|
28 |
+ |
|
29 |
+ it "should check for changes and save known entries in memory" do |
|
30 |
+ lambda { @checker.check }.should change { Event.count }.by(3) |
|
31 |
+ @checker.memory['known_entries'].tap { |known_entries| |
|
32 |
+ known_entries.size.should == 3 |
|
33 |
+ known_entries.sort_by(&:last).should == [ |
|
34 |
+ ["example-1.0.tar.gz", "2013-10-01T10:00:00Z"], |
|
35 |
+ ["example-1.1.tar.gz", "2014-04-01T10:00:00Z"], |
|
36 |
+ ["example-latest.tar.gz", "2014-04-01T10:00:01Z"], |
|
37 |
+ ] |
|
38 |
+ } |
|
39 |
+ |
|
40 |
+ Event.last(2).first.payload.should == { |
|
41 |
+ 'url' => 'ftp://ftp.example.org/pub/releases/example-1.1.tar.gz', |
|
42 |
+ 'filename' => 'example-1.1.tar.gz', |
|
43 |
+ 'timestamp' => '2014-04-01T10:00:00Z', |
|
44 |
+ } |
|
45 |
+ |
|
46 |
+ lambda { @checker.check }.should_not change { Event.count } |
|
47 |
+ |
|
48 |
+ stub(@checker).each_entry.returns { |block| |
|
49 |
+ block.call("example-latest.tar.gz", Time.parse("2014-04-02T10:00:01Z")) |
|
50 |
+ |
|
51 |
+ # In the long list format the timestamp may look going |
|
52 |
+ # backwards after six months: Oct 01 10:00 -> Oct 01 2013 |
|
53 |
+ block.call("example-1.0.tar.gz", Time.parse("2013-10-01T00:00:00Z")) |
|
54 |
+ |
|
55 |
+ block.call("example-1.1.tar.gz", Time.parse("2014-04-01T10:00:00Z")) |
|
56 |
+ block.call("example-1.2.tar.gz", Time.parse("2014-04-02T10:00:00Z")) |
|
57 |
+ } |
|
58 |
+ lambda { @checker.check }.should change { Event.count }.by(2) |
|
59 |
+ @checker.memory['known_entries'].tap { |known_entries| |
|
60 |
+ known_entries.size.should == 4 |
|
61 |
+ known_entries.sort_by(&:last).should == [ |
|
62 |
+ ["example-1.0.tar.gz", "2013-10-01T00:00:00Z"], |
|
63 |
+ ["example-1.1.tar.gz", "2014-04-01T10:00:00Z"], |
|
64 |
+ ["example-1.2.tar.gz", "2014-04-02T10:00:00Z"], |
|
65 |
+ ["example-latest.tar.gz", "2014-04-02T10:00:01Z"], |
|
66 |
+ ] |
|
67 |
+ } |
|
68 |
+ |
|
69 |
+ Event.last(2).first.payload.should == { |
|
70 |
+ 'url' => 'ftp://ftp.example.org/pub/releases/example-1.2.tar.gz', |
|
71 |
+ 'filename' => 'example-1.2.tar.gz', |
|
72 |
+ 'timestamp' => '2014-04-02T10:00:00Z', |
|
73 |
+ } |
|
74 |
+ |
|
75 |
+ lambda { @checker.check }.should_not change { Event.count } |
|
76 |
+ end |
|
77 |
+ end |
|
78 |
+ end |
|
79 |
+end |